clear all
set more off
set maxvar 10000


*---------------------------*
* PART 1: CLEAN CENSUS 
*---------------------------*

    foreach d of numlist 2/9 {

        use "$CensusData/input/Census_1910to2010_1pct_raw.dta",clear
        keep if year==19`d'0

    * Identify fathers
        keep if age<18 //Restrict to children younger than 18
        keep serial poploc age

        replace poploc=. if poploc==0 
        drop if poploc==. //Exclude children without a father in the house

        bysort serial poploc: keep if _n==1 //keep all unique father ids. Some fathers will have multiple children in the Census
        rename poploc pernum
        drop age

        tempfile children 
        save `children'

        * Keep the sample of fathers 
        use "$CensusData/input/Census_1910to2010_1pct_raw.dta",clear
        keep if year==19`d'0
        merge 1:1 serial pernum using `children'

        keep if _merge==3 
        drop _merge

        * Keep black and white fathers ages 30 to 50
        keep if inrange(age,30,50) 
        keep if race==1 | race==2 
        
        gen agesq = age*age

    * Crosswalk Census occupations to ANES occupations
        sort occ1950 
        replace occ1950=. if occ1950>=980

        * Separate people with occupations in 200's based on self-employment
        replace occ1950=occ1950+1000 if (occ1950>=200 & occ1950<=290) & classwkr==1

        * Crosswalk
        merge m:1 occ1950 using "$Crosswalks/Crosswalk_1950Census_toANES.dta"
        assert _merge!=1
        drop if _merge==2
        drop _merge

        ren occ1950ej fatheroccej

    * Dummy: Southern residence   
        gen south_merge = (inrange(region,31,34)) if region<90
        tab south_merge,m 
        tab region, m

    * Fix Census weight to have mean 1
        gen wgt_sex_race=.
        sum perwt
        local weight_avg = r(mean)
        
        replace wgt_sex_race = perwt/`weight_avg' 
        summ wgt_sex_race
            
*-----------------------------------------------*
* PART 2: MERGE IN CENSUS INCOME SCORES
*-----------------------------------------------*

    * 1950-1990 (occupation x race x south)   
        sort fatheroccej race south_merge
        merge m:1 fatheroccej race south_merge using "$CensusData/output/IncomeScores_Coarsened_byrace_bysouth.dta"
        assert fatheroccej==. | race==. | south_merge==. if _merge==1 
        drop if _merge==2
        drop _merge 

    * 1936 income score (occupation x race x south)   
        sort fatheroccej race south_merge
        merge m:1 fatheroccej race south_merge using "$Survey1936/output/ConsumptionSurvey_1936_IncomeScores.dta"
        assert (fatheroccej==. | fatheroccej==99) | race==. | south_merge==. if _merge==1 
        drop if _merge==2
        drop _merge
        drop flag_impute_1936 avg_totfaminc_1936_byocc_byr avg_totfaminc_1936_byocc number_obs_cell_1936

    * 1900 income score
        gen occ1950ej_PH = fatheroccej
        replace occ1950ej_PH = 28 if fatheroccej==21 //no self employment distinction in 1900 data
        
        merge m:1 occ1950ej_PH race south_merge using "$Mydirectory1/1_DataSources/1900_IncomeScores/output/IncomeScores_1900_byrace_bysouth.dta"
        assert fatheroccej==. | race==. | south_merge==. if _merge==1 
        drop if _merge==2
        drop _merge   
        drop netearn00_adj_byrace netearn00_adj_byocc income_PH_farmfix_byrace income_PH_farmfix_byocc

*------------------------------------------------------*
* PART 3: LOG INCOME SCORES (OCC X RACE X SOUTH)
*------------------------------------------------------*

    * Mix 1940 Census with 1936 survey 
        gen father_HHinc_1936fix = avg_HHinc_1940_byrace_bysouth 
        replace father_HHinc_1936fix= avg_totfaminc_1936 if fatheroccej==81 | fatheroccej==21
        label var father_HHinc_1936fix "Father baseline income score, 1936 farm and self-emp"
        
        gen log_father_HHinc_1936fix = ln(father_HHinc_1936fix)
        label var log_father_HHinc_1936fix "Logged father's baseline HH income, 1936 farm and self-emp. fix"

    * Mix 1900 sources
        foreach x in byr_bys  {
            gen father_inc_1900_`x' = income_PH_farmfix_`x'
            replace father_inc_1900_`x' = netearn00_adj_`x' if fatheroccej==81
            label var father_inc_1900_`x' "Father income score, 1900 Census of Ag and Preston Haines"
            
            gen log_father_1900_`x' = ln(father_inc_1900_`x')
            label var log_father_1900_`x' "Logged father's income, 1900 Census of Ag and Preston Haines"    
        }

        assert log_father_1900_byr_bys==. if log_father_HHinc_1936fix==. 

    * 1950-1990
        clonevar avg_HHinc_1950_byocc_byr_bys = avg_inctot_1950_byocc_byr_bys //no household-level income in 1950 
        label var avg_HHinc_1950_byocc_byr_bys "clone of avg_inctot_1950_byocc_byr_bys"

        forval i=1950(10)1990 {
            gen log_father_`i'_byr_bys = ln(avg_HHinc_`i'_byocc_byr_bys)
            label var log_father_`i'_byr_bys "Logged father's household income, `i', by race by south"
        }

*------------------------------------------------------*
* PART 4: ASSIGN INCOME SCORES TO CENSUS FATHERS
*------------------------------------------------------*
 /*General approach: assign closest Census to when the 
                     Census father is observed in the data. 
                     Some fathers will be assigned a weighted
                     average of two closest Censuses. */
        if `d'==2 local inc "(1/2)*log_father_1900_byr_bys + (1/2)*log_father_HHinc_1936fix"
        if `d'==3 local inc "(1/3)*log_father_1900_byr_bys + (2/3)*log_father_HHinc_1936fix"
        if `d'==4 local inc "log_father_HHinc_1936fix"
        if `d'==5 local inc "(1/2)*log_father_HHinc_1936fix + (1/2)*log_father_1960_byr_bys"
        if `d'==6 local inc "log_father_1960_byr_bys"
        if `d'==7 local inc "log_father_1970_byr_bys"
        if `d'==8 local inc "log_father_1980_byr_bys"
        if `d'==9 local inc "log_father_1990_byr_bys"

        gen log_father_interp_rbias_v1 = `inc'
        gen log_father_interp_rbias_v2 = `inc'

    * Save cleaned Census
        keep year fatheroccej race south_merge wgt_sex_race age agesq birthyr log_father_interp_rbias_v1 log_father_interp_rbias_v2
        sort fatheroccej race south_merge
        
        compress 
        tempfile census19`d'0_fathers30_50
        save `census19`d'0_fathers30_50', replace
        
    }


*------------------------------------------------------*
* PART 6: PREPARE SURVEY DATA
*------------------------------------------------------*

	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
    keep if baseline_sample==1
    
    keep fatheroccej decade dob race south_merge wgt_sex_race age agesq log_father_interp_rbias_v1 log_father_interp_rbias_v2 /* father_income_baseline */ /*rank_father_baseline*/
    
    ren dob birthyr
    
    tempfile surveys
    save `surveys', replace


*------------------------------------------------------*
* PART 7: MAKE FIGURES
*------------------------------------------------------*

*************************************************
/* PANEL A: Earlier Census 
            (Respondents between 1 and 10) */
*************************************************

* Append Censuses and survey
    use `census1920_fathers30_50', clear    
    append using `census1930_fathers30_50'
    append using `census1940_fathers30_50'
    append using `census1950_fathers30_50'
    append using `census1960_fathers30_50'
    append using `census1970_fathers30_50'
    append using `census1980_fathers30_50'
    append using `census1990_fathers30_50'
    append using `surveys', generate(survey) 
    /* Note: The Census is the "master" dataset; will get a 
             "0" for the "survey" variable. */

    forval i=1(1)2 {
        gen est_`i'=. 
        gen est_lb_`i' =.
        gen est_ub_`i' =.
    }

    foreach d of numlist 2/8  { 
                
        if `d'==2 local cond "if (survey==1 & decade==1910) | (survey==0 & year==19`d'0)"
        if `d'==3 local cond "if (survey==1 & decade==1920) | (survey==0 & year==19`d'0)"
        if `d'==4 local cond "if (survey==1 & decade==1930) | (survey==0 & year==19`d'0)"
        if `d'==5 local cond "if (survey==1 & decade==1940) | (survey==0 & year==19`d'0)"
        if `d'==6 local cond "if (survey==1 & decade==1950) | (survey==0 & year==19`d'0)"
        if `d'==7 local cond "if (survey==1 & decade==1960) | (survey==0 & year==19`d'0)"
        if `d'==8 local cond "if (survey==1 & decade==1970) | (survey==0 & year==19`d'0)"
        
    * Drop observations with missing father income
        drop if log_father_interp_rbias_v1==.

        di "`d'"
        local i = `d'-1
        di "`i'"

        reg log_father_interp_rbias_v1 survey  [aw=wgt_sex_race] `cond', robust
        replace est_1 = _b[survey] if decade==19`i'0
        replace est_ub_1 = _b[survey]+1.96*_se[survey] if decade==19`i'0
        replace est_lb_1 = _b[survey]-1.96*_se[survey] if decade==19`i'0
    }

* Figure (earlier Census)
    preserve
        bysort decade: keep if _n==1
        keep decade est_*
        
        reshape long est_ est_lb_ est_ub_, i(decade) j(estimate)

        #delimit ;
            twoway (scatter est_ decade if estimate==1,  msymbol(circle) mcolor(emerald) msize(medium) yaxis(1)) 
                   (rcap est_lb_  est_ub_  decade if estimate==1, lpatter(solid) lcolor(emerald) yaxis(1) lwidth(0.5)),
            yline(0,lpat(dash_dot) lcolor(gs9) lwidth(medthick))
            xti(" " "Decade of respondent's birth") xlabel(1910(10)1970) xscale(range(1905 1975))
            ylabel(-.1(.05)0.1, axis(1)) yti("Coefficient on survey" " ", axis(1)) legend(off)
            xlabel(1910 "1910s" 1920 "1920s" 1930 "1930s" 1940 "1940s" 1950 "1950s" 1960 "1960s" 1970 "1970s", labsize(small) ) ;  
        #delimit cr
        graph export "$Mydirectory2/appendix_c/corr_fatherinc_surveydummy_log_series_earliercensus.pdf", as(pdf) replace
    restore

*************************************************
/* PANEL B: Later Census 
            (Respondents between 11 and 20) */
*************************************************
            
    foreach d of numlist 3/9  {
                        
        if `d'==3 local cond "if (survey==1 & decade==1910) | (survey==0 & year==19`d'0)" 
        if `d'==4 local cond "if (survey==1 & decade==1920) | (survey==0 & year==19`d'0)"
        if `d'==5 local cond "if (survey==1 & decade==1930) | (survey==0 & year==19`d'0)"
        if `d'==6 local cond "if (survey==1 & decade==1940) | (survey==0 & year==19`d'0)"
        if `d'==7 local cond "if (survey==1 & decade==1950) | (survey==0 & year==19`d'0)"
        if `d'==8 local cond "if (survey==1 & decade==1960) | (survey==0 & year==19`d'0)"
        if `d'==9 local cond "if (survey==1 & decade==1970) | (survey==0 & year==19`d'0)"
            
    * Drop observations with missing father income
        drop if log_father_interp_rbias_v2==.
                        
        di "`d'"
        local y = `d'-2
        di "`y'"

        reg log_father_interp_rbias_v2 survey  [aw=wgt_sex_race] `cond', robust
        replace est_2 = _b[survey] if decade==19`y'0
        replace est_ub_2 = _b[survey]+1.96*_se[survey] if decade==19`y'0
        replace est_lb_2 = _b[survey]-1.96*_se[survey] if decade==19`y'0
                    
    }

    * Figure (later Census)
    preserve
        bysort decade: keep if _n==1
        keep decade est_*
        
        reshape long est_ est_lb_ est_ub_, i(decade) j(estimate)

        #delimit ;
            twoway (scatter est_ decade if estimate==2,  msymbol(diamond) mcolor(gold*1.25) msize(small) yaxis(1)) 
                   (rcap est_lb_  est_ub_  decade if estimate==2, lpatter(solid) lcolor(gold*1.25) yaxis(1) lwidth(0.5)),
            xti(" " "Decade of respondent's birth") xlabel(1910(10)1970) xscale(range(1905 1975))
            yline(0,lpat(dash_dot) lcolor(gs9) lwidth(medthick)) legend(off)
            ylabel(-.1(.05)0.1, axis(1)) yti("Coefficient on survey" " ", axis(1))
            xlabel(1910 "1910s" 1920 "1920s" 1930 "1930s" 1940 "1940s" 1950 "1950s" 1960 "1960s" 1970 "1970s", labsize(small) ) ;  
        #delimit cr
        graph export "$Mydirectory2/appendix_c/corr_fatherinc_surveydummy_log_series_latercensus.pdf", as(pdf) replace
    restore